ng911ok.lib.validation module#

ng911ok.lib.validation – Support for validation logic

Notes#

synopsis:

authors:

Riley Baird (OK), Emma Baker (OK)

created:

September 03, 2024

modified:

December 16, 2024

class FeatureAttributeErrorMessage(severity: Literal['Notice', 'Warning', 'Error'], code: T_Code, layer1: str, nguid1: str, field1: str, value1: str | int | float | datetime | date | time | Geometry | None, layer2: str | None, nguid2: str | None, field2: str | None, value2: str | int | float | datetime | date | time | Geometry | None, message: str | Callable[[Self], str])#

Bases: _ValidationErrorInfo[Literal[‘ERROR:DOMAIN:INVALID_VALUE’, ‘ERROR:GENERAL:INVALID_VALUE’, ‘ERROR:GENERAL:MANDATORY_IS_NULL’, ‘ERROR:GENERAL:MANDATORY_IS_BLANK’, ‘ERROR:GENERAL:NOT_UPPERCASE’, ‘ERROR:GENERAL:UNIQUENESS’, ‘WARNING:GENERAL:LEADING_TRAILING_SPACE’, ‘ERROR:NGUID:FORMAT’, ‘ERROR:NGUID:V2_FORMAT’, ‘ERROR:NGUID:AGENCY’, ‘ERROR:NGUID:LAYER’, ‘ERROR:NGUID:DUPLICATE’, ‘ERROR:ADDRESS:DUPLICATE’, ‘ERROR:ADDRESS_RANGE:OVERLAP’, ‘ERROR:ADDRESS_RANGE:DECREASING’, ‘ERROR:ROAD_ESN:DEVIATION’, ‘ERROR:ROAD_ESN:CROSSING’, ‘ERROR:ROAD_ESN:OUT_OF_BOUNDS’, ‘ERROR:PARITY:EXPECTED_ZERO’, ‘ERROR:PARITY:EXPECTED_NONZERO’, ‘ERROR:PARITY:MISMATCH’, ‘ERROR:PARITY:INVALID’, ‘ERROR:PARITY:NULL’, ‘ERROR:LEGACY:MISMATCH’, ‘ERROR:GEOCODE:UNKNOWN_MATCH’, ‘ERROR:GEOCODE:WRONG_SIDE’, ‘ERROR:GEOCODE:BOTH_SIDES’, ‘ERROR:GEOCODE:WRONG_COMMUNITY’, ‘ERROR:GEOCODE:OUT_OF_RANGE’, ‘ERROR:GEOCODE:NAME_MISMATCH’, ‘ERROR:CONSISTENCY:ADDRESS_ESN’, ‘ERROR:CONSISTENCY:ROAD_ESN’, ‘ERROR:CONSISTENCY:COMMUNITY’, ‘WARNING:CONSISTENCY:ROAD_LEVEL’, ‘WARNING:CONSISTENCY:ROAD_ESN’, ‘ERROR:GEOMETRY:TOPOLOGY’, ‘WARNING:GEOMETRY:CUTBACK’, ‘WARNING:GEOMETRY:SHORT_SEGMENT’, ‘NOTICE:CONSISTENCY:ROAD_ESN’]], ValidationErrorMessage[Literal[‘ERROR:DOMAIN:INVALID_VALUE’, ‘ERROR:GENERAL:INVALID_VALUE’, ‘ERROR:GENERAL:MANDATORY_IS_NULL’, ‘ERROR:GENERAL:MANDATORY_IS_BLANK’, ‘ERROR:GENERAL:NOT_UPPERCASE’, ‘ERROR:GENERAL:UNIQUENESS’, ‘WARNING:GENERAL:LEADING_TRAILING_SPACE’, ‘ERROR:NGUID:FORMAT’, ‘ERROR:NGUID:V2_FORMAT’, ‘ERROR:NGUID:AGENCY’, ‘ERROR:NGUID:LAYER’, ‘ERROR:NGUID:DUPLICATE’, ‘ERROR:ADDRESS:DUPLICATE’, ‘ERROR:ADDRESS_RANGE:OVERLAP’, ‘ERROR:ADDRESS_RANGE:DECREASING’, ‘ERROR:ROAD_ESN:DEVIATION’, ‘ERROR:ROAD_ESN:CROSSING’, ‘ERROR:ROAD_ESN:OUT_OF_BOUNDS’, ‘ERROR:PARITY:EXPECTED_ZERO’, ‘ERROR:PARITY:EXPECTED_NONZERO’, ‘ERROR:PARITY:MISMATCH’, ‘ERROR:PARITY:INVALID’, ‘ERROR:PARITY:NULL’, ‘ERROR:LEGACY:MISMATCH’, ‘ERROR:GEOCODE:UNKNOWN_MATCH’, ‘ERROR:GEOCODE:WRONG_SIDE’, ‘ERROR:GEOCODE:BOTH_SIDES’, ‘ERROR:GEOCODE:WRONG_COMMUNITY’, ‘ERROR:GEOCODE:OUT_OF_RANGE’, ‘ERROR:GEOCODE:NAME_MISMATCH’, ‘ERROR:CONSISTENCY:ADDRESS_ESN’, ‘ERROR:CONSISTENCY:ROAD_ESN’, ‘ERROR:CONSISTENCY:COMMUNITY’, ‘WARNING:CONSISTENCY:ROAD_LEVEL’, ‘WARNING:CONSISTENCY:ROAD_ESN’, ‘ERROR:GEOMETRY:TOPOLOGY’, ‘WARNING:GEOMETRY:CUTBACK’, ‘WARNING:GEOMETRY:SHORT_SEGMENT’, ‘NOTICE:CONSISTENCY:ROAD_ESN’]]

classmethod from_df(data: DataFrame, validity: DataFrame, severity: Literal['Notice', 'Warning', 'Error'], code: Literal['ERROR:DOMAIN:INVALID_VALUE', 'ERROR:GENERAL:INVALID_VALUE', 'ERROR:GENERAL:MANDATORY_IS_NULL', 'ERROR:GENERAL:MANDATORY_IS_BLANK', 'ERROR:GENERAL:NOT_UPPERCASE', 'ERROR:GENERAL:UNIQUENESS', 'WARNING:GENERAL:LEADING_TRAILING_SPACE', 'ERROR:NGUID:FORMAT', 'ERROR:NGUID:V2_FORMAT', 'ERROR:NGUID:AGENCY', 'ERROR:NGUID:LAYER', 'ERROR:NGUID:DUPLICATE', 'ERROR:ADDRESS:DUPLICATE', 'ERROR:ADDRESS_RANGE:OVERLAP', 'ERROR:ADDRESS_RANGE:DECREASING', 'ERROR:ROAD_ESN:DEVIATION', 'ERROR:ROAD_ESN:CROSSING', 'ERROR:ROAD_ESN:OUT_OF_BOUNDS', 'ERROR:PARITY:EXPECTED_ZERO', 'ERROR:PARITY:EXPECTED_NONZERO', 'ERROR:PARITY:MISMATCH', 'ERROR:PARITY:INVALID', 'ERROR:PARITY:NULL', 'ERROR:LEGACY:MISMATCH', 'ERROR:GEOCODE:UNKNOWN_MATCH', 'ERROR:GEOCODE:WRONG_SIDE', 'ERROR:GEOCODE:BOTH_SIDES', 'ERROR:GEOCODE:WRONG_COMMUNITY', 'ERROR:GEOCODE:OUT_OF_RANGE', 'ERROR:GEOCODE:NAME_MISMATCH', 'ERROR:CONSISTENCY:ADDRESS_ESN', 'ERROR:CONSISTENCY:ROAD_ESN', 'ERROR:CONSISTENCY:COMMUNITY', 'WARNING:CONSISTENCY:ROAD_LEVEL', 'WARNING:CONSISTENCY:ROAD_ESN', 'ERROR:GEOMETRY:TOPOLOGY', 'WARNING:GEOMETRY:CUTBACK', 'WARNING:GEOMETRY:SHORT_SEGMENT', 'NOTICE:CONSISTENCY:ROAD_ESN'], layer: str, message: str | Callable[[Self], str]) list[Self]#

Uses an input pd.DataFrame containing the data and another pd.DataFrame of bool values, whose shape and index correspond to those of data, indicating whether the values of data are valid, to handle error generation for many attributes at once.

Previously, the following applied:

The data and validity arguments must have an index that is also equivalent to a column in the data. This can be accomplished by calling, e.g., df.set_index(index_column_name, drop=False) before passing df as the data or validity argument.

classmethod from_df_two_fields(data: DataFrame, validity: Series, field1: str, field2: str, severity: Literal['Notice', 'Warning', 'Error'], code: Literal['ERROR:DOMAIN:INVALID_VALUE', 'ERROR:GENERAL:INVALID_VALUE', 'ERROR:GENERAL:MANDATORY_IS_NULL', 'ERROR:GENERAL:MANDATORY_IS_BLANK', 'ERROR:GENERAL:NOT_UPPERCASE', 'ERROR:GENERAL:UNIQUENESS', 'WARNING:GENERAL:LEADING_TRAILING_SPACE', 'ERROR:NGUID:FORMAT', 'ERROR:NGUID:V2_FORMAT', 'ERROR:NGUID:AGENCY', 'ERROR:NGUID:LAYER', 'ERROR:NGUID:DUPLICATE', 'ERROR:ADDRESS:DUPLICATE', 'ERROR:ADDRESS_RANGE:OVERLAP', 'ERROR:ADDRESS_RANGE:DECREASING', 'ERROR:ROAD_ESN:DEVIATION', 'ERROR:ROAD_ESN:CROSSING', 'ERROR:ROAD_ESN:OUT_OF_BOUNDS', 'ERROR:PARITY:EXPECTED_ZERO', 'ERROR:PARITY:EXPECTED_NONZERO', 'ERROR:PARITY:MISMATCH', 'ERROR:PARITY:INVALID', 'ERROR:PARITY:NULL', 'ERROR:LEGACY:MISMATCH', 'ERROR:GEOCODE:UNKNOWN_MATCH', 'ERROR:GEOCODE:WRONG_SIDE', 'ERROR:GEOCODE:BOTH_SIDES', 'ERROR:GEOCODE:WRONG_COMMUNITY', 'ERROR:GEOCODE:OUT_OF_RANGE', 'ERROR:GEOCODE:NAME_MISMATCH', 'ERROR:CONSISTENCY:ADDRESS_ESN', 'ERROR:CONSISTENCY:ROAD_ESN', 'ERROR:CONSISTENCY:COMMUNITY', 'WARNING:CONSISTENCY:ROAD_LEVEL', 'WARNING:CONSISTENCY:ROAD_ESN', 'ERROR:GEOMETRY:TOPOLOGY', 'WARNING:GEOMETRY:CUTBACK', 'WARNING:GEOMETRY:SHORT_SEGMENT', 'NOTICE:CONSISTENCY:ROAD_ESN'], layer: str, message: str | Callable[[Self], str]) list[Self]#

Similar to from_df(), but generates error messages involving two fields in a single feature class. Unlike in from_df(), however, validity should be a Series, not a DataFrame, and it should represent the validity of the two fields by row. For rows where validity is False, the values in the columns named field1 and field2 in data will be used to generate the error messages.

In the resulting instance, layer2 and nguid2` will be None.

classmethod from_joined_df(data: DataFrame, column_pairs: dict[str, str], use_left_column_names: bool, severity: Literal['Notice', 'Warning', 'Error'], code: Literal['ERROR:DOMAIN:INVALID_VALUE', 'ERROR:GENERAL:INVALID_VALUE', 'ERROR:GENERAL:MANDATORY_IS_NULL', 'ERROR:GENERAL:MANDATORY_IS_BLANK', 'ERROR:GENERAL:NOT_UPPERCASE', 'ERROR:GENERAL:UNIQUENESS', 'WARNING:GENERAL:LEADING_TRAILING_SPACE', 'ERROR:NGUID:FORMAT', 'ERROR:NGUID:V2_FORMAT', 'ERROR:NGUID:AGENCY', 'ERROR:NGUID:LAYER', 'ERROR:NGUID:DUPLICATE', 'ERROR:ADDRESS:DUPLICATE', 'ERROR:ADDRESS_RANGE:OVERLAP', 'ERROR:ADDRESS_RANGE:DECREASING', 'ERROR:ROAD_ESN:DEVIATION', 'ERROR:ROAD_ESN:CROSSING', 'ERROR:ROAD_ESN:OUT_OF_BOUNDS', 'ERROR:PARITY:EXPECTED_ZERO', 'ERROR:PARITY:EXPECTED_NONZERO', 'ERROR:PARITY:MISMATCH', 'ERROR:PARITY:INVALID', 'ERROR:PARITY:NULL', 'ERROR:LEGACY:MISMATCH', 'ERROR:GEOCODE:UNKNOWN_MATCH', 'ERROR:GEOCODE:WRONG_SIDE', 'ERROR:GEOCODE:BOTH_SIDES', 'ERROR:GEOCODE:WRONG_COMMUNITY', 'ERROR:GEOCODE:OUT_OF_RANGE', 'ERROR:GEOCODE:NAME_MISMATCH', 'ERROR:CONSISTENCY:ADDRESS_ESN', 'ERROR:CONSISTENCY:ROAD_ESN', 'ERROR:CONSISTENCY:COMMUNITY', 'WARNING:CONSISTENCY:ROAD_LEVEL', 'WARNING:CONSISTENCY:ROAD_ESN', 'ERROR:GEOMETRY:TOPOLOGY', 'WARNING:GEOMETRY:CUTBACK', 'WARNING:GEOMETRY:SHORT_SEGMENT', 'NOTICE:CONSISTENCY:ROAD_ESN'], index_layer: str, join_layer: str, join_layer_id_name: str, message: str | Callable[[Self], str]) list[Self]#

Similar to from_df(), but generates error messages for situations that meet the following criteria:

  • Data from two different feature classes (“left FC”/index_layer and “right FC”/join_layer) are joined in a single data frame

  • Columns are paired in the sense that a value from a column from left FC should equal the value in the corresponding column from right FC

  • The values of a column in left_fields should equal the values of the column in the corresponding position in right_fields

  • The index of data is the index/NGUID of left FC

  • The index/NGUID of right FC is provided as a column (join_layer_id_name)

This method has no validity parameter; data validity is computed automatically.

Example:

left_df = pd.DataFrame({
    "nguid": ["left1", "left2", "left3", "left4"],
    "street": ["PECAN", "WALNUT", "CHERRY", "MAPLE"],
    "streettype": ["STREET", "AVENUE", "DRIVE", "BOULEVARD"],
    "match_key": ["right1", "right2", "right3", "right4"]
}).set_index("nguid", drop=False)
right_df = pd.DataFrame({
    "nguid": ["right1", "right2", "right3", "right4"],
    "street": ["PECAN", "WALNUT", "CHERRY", "CHESTNUT"],
    "streettype": ["STREET", "AVE", "DRIVE", "BOULEVARD"]
}).set_index("nguid", drop=False)
data = left_df.join(right_df, on="match_key", rsuffix="_right")
left_columns = ["street", "streettype"]
right_columns = [f"{col}_right" for col in left_columns]
validity: pd.DataFrame = data.applymap(lambda _: True)
validity.loc[:, left_columns] = data[left_columns].values == data[right_columns].values
Parameters:
  • data (pd.DataFrame) – The input data frame

  • column_pairs (dict[str, str]) – Mapping of corresponding column names as {left: right}

  • use_left_column_names (bool) – Whether to use the keys in column_pairs for both field1 and field2 in the output

  • severity (Severity) – The severity of the messages

  • code (FeatureAttributeErrorCode) – The specific error code of the messages

  • index_layer (str) – The name of the layer on the left side of the join which retains an NGUID as the index of data

  • join_layer (str) – The name of the layer on the right side of the join that produced data

  • join_layer_id_name (str) – The name of the column (not necessarily field) containing NGUIDs for join_layer

  • message (Union[str, ValidationErrorMessageFunction[FeatureAttributeErrorInfo]]) – Message or message-generating function to be passed to the output

Returns:

Derived feature attribute errors

Return type:

list[Self]

classmethod one_feature(severity: Literal['Notice', 'Warning', 'Error'], code: Literal['ERROR:DOMAIN:INVALID_VALUE', 'ERROR:GENERAL:INVALID_VALUE', 'ERROR:GENERAL:MANDATORY_IS_NULL', 'ERROR:GENERAL:MANDATORY_IS_BLANK', 'ERROR:GENERAL:NOT_UPPERCASE', 'ERROR:GENERAL:UNIQUENESS', 'WARNING:GENERAL:LEADING_TRAILING_SPACE', 'ERROR:NGUID:FORMAT', 'ERROR:NGUID:V2_FORMAT', 'ERROR:NGUID:AGENCY', 'ERROR:NGUID:LAYER', 'ERROR:NGUID:DUPLICATE', 'ERROR:ADDRESS:DUPLICATE', 'ERROR:ADDRESS_RANGE:OVERLAP', 'ERROR:ADDRESS_RANGE:DECREASING', 'ERROR:ROAD_ESN:DEVIATION', 'ERROR:ROAD_ESN:CROSSING', 'ERROR:ROAD_ESN:OUT_OF_BOUNDS', 'ERROR:PARITY:EXPECTED_ZERO', 'ERROR:PARITY:EXPECTED_NONZERO', 'ERROR:PARITY:MISMATCH', 'ERROR:PARITY:INVALID', 'ERROR:PARITY:NULL', 'ERROR:LEGACY:MISMATCH', 'ERROR:GEOCODE:UNKNOWN_MATCH', 'ERROR:GEOCODE:WRONG_SIDE', 'ERROR:GEOCODE:BOTH_SIDES', 'ERROR:GEOCODE:WRONG_COMMUNITY', 'ERROR:GEOCODE:OUT_OF_RANGE', 'ERROR:GEOCODE:NAME_MISMATCH', 'ERROR:CONSISTENCY:ADDRESS_ESN', 'ERROR:CONSISTENCY:ROAD_ESN', 'ERROR:CONSISTENCY:COMMUNITY', 'WARNING:CONSISTENCY:ROAD_LEVEL', 'WARNING:CONSISTENCY:ROAD_ESN', 'ERROR:GEOMETRY:TOPOLOGY', 'WARNING:GEOMETRY:CUTBACK', 'WARNING:GEOMETRY:SHORT_SEGMENT', 'NOTICE:CONSISTENCY:ROAD_ESN'], layer1: str, nguid1: str, field1: str, value1: str | int | float | datetime | date | time | Geometry | None, message: str | Callable[[Self], str]) Self#
to_series(timestamp: datetime) Series#
_abc_impl = <_abc._abc_data object>#
_is_protocol = False#
_message: str | Callable[[Self], str]#
property feature_count: Literal[0, 1, 2]#
field1: str#
field2: str | None#
layer1: str#
layer2: str | None#
message: str#
nguid1: str#
nguid2: str | None#
value1: str | int | float | datetime | date | time | Geometry | None#
value2: str | int | float | datetime | date | time | Geometry | None#
class GDBErrorMessage(severity: Literal['Notice', 'Warning', 'Error'], code: T_Code, layer: str | None = None, field: str | None = None, message: str | Callable[[Self], str] = None)#

Bases: _ValidationErrorInfo[Literal[‘ERROR:PYTHON:EXCEPTION’, ‘ERROR:GDB:MISSING_REQUIRED_DATASET’, ‘ERROR:GDB:MISSING_REQUIRED_FEATURE_CLASS’, ‘ERROR:GDB:EXTRA_ITEM’, ‘ERROR:GDB:MISSING_DOMAIN’, ‘ERROR:GDB:EXTRA_DOMAIN’, ‘ERROR:GDB:INCORRECT_DOMAIN_TYPE’, ‘ERROR:GDB:DOMAIN_MISSING_CODE’, ‘ERROR:GDB:DOMAIN_EXTRA_CODE’, ‘ERROR:GDB:DOMAIN_CODE_VALUE_MISMATCH’, ‘ERROR:GDB:INCORRECT_DOMAIN_DESCRIPTION’, ‘ERROR:DATASET:INCORRECT_SPATIAL_REFERENCE’, ‘ERROR:DATASET:MISSING_TOPOLOGY’, ‘ERROR:DATASET:INCORRECT_TOPOLOGY’, ‘ERROR:DATASET:TOPOLOGY_VIOLATION’, ‘ERROR:FEATURE_CLASS:MISSING_REQUIRED_FIELD’, ‘ERROR:FEATURE_CLASS:EXTRA_FIELD’, ‘ERROR:FEATURE_CLASS:INCORRECT_SPATIAL_REFERENCE’, ‘ERROR:FEATURE_CLASS:INCORRECT_GEOMETRY_TYPE’, ‘ERROR:FEATURE_CLASS:INCORRECT_FEATURE_TYPE’, ‘ERROR:FEATURE_CLASS:EMPTY’, ‘ERROR:FEATURE_CLASS:EMPTY_SUBMISSION’, ‘ERROR:FIELD:INCORRECT_FIELD_TYPE’, ‘ERROR:FIELD:INCORRECT_FIELD_LENGTH’, ‘ERROR:FIELD:INCORRECT_FIELD_DOMAIN’, ‘NOTICE:GDB:MISSING_OPTIONAL_DATASET’, ‘NOTICE:GDB:MISSING_OPTIONAL_FEATURE_CLASS’]], ValidationErrorMessage[Literal[‘ERROR:PYTHON:EXCEPTION’, ‘ERROR:GDB:MISSING_REQUIRED_DATASET’, ‘ERROR:GDB:MISSING_REQUIRED_FEATURE_CLASS’, ‘ERROR:GDB:EXTRA_ITEM’, ‘ERROR:GDB:MISSING_DOMAIN’, ‘ERROR:GDB:EXTRA_DOMAIN’, ‘ERROR:GDB:INCORRECT_DOMAIN_TYPE’, ‘ERROR:GDB:DOMAIN_MISSING_CODE’, ‘ERROR:GDB:DOMAIN_EXTRA_CODE’, ‘ERROR:GDB:DOMAIN_CODE_VALUE_MISMATCH’, ‘ERROR:GDB:INCORRECT_DOMAIN_DESCRIPTION’, ‘ERROR:DATASET:INCORRECT_SPATIAL_REFERENCE’, ‘ERROR:DATASET:MISSING_TOPOLOGY’, ‘ERROR:DATASET:INCORRECT_TOPOLOGY’, ‘ERROR:DATASET:TOPOLOGY_VIOLATION’, ‘ERROR:FEATURE_CLASS:MISSING_REQUIRED_FIELD’, ‘ERROR:FEATURE_CLASS:EXTRA_FIELD’, ‘ERROR:FEATURE_CLASS:INCORRECT_SPATIAL_REFERENCE’, ‘ERROR:FEATURE_CLASS:INCORRECT_GEOMETRY_TYPE’, ‘ERROR:FEATURE_CLASS:INCORRECT_FEATURE_TYPE’, ‘ERROR:FEATURE_CLASS:EMPTY’, ‘ERROR:FEATURE_CLASS:EMPTY_SUBMISSION’, ‘ERROR:FIELD:INCORRECT_FIELD_TYPE’, ‘ERROR:FIELD:INCORRECT_FIELD_LENGTH’, ‘ERROR:FIELD:INCORRECT_FIELD_DOMAIN’, ‘NOTICE:GDB:MISSING_OPTIONAL_DATASET’, ‘NOTICE:GDB:MISSING_OPTIONAL_FEATURE_CLASS’]]

to_series(timestamp: datetime) Series#
_abc_impl = <_abc._abc_data object>#
_is_protocol = False#
_message: str | Callable[[Self], str]#
field: str | None#
layer: str | None#
message: str#
class ValidationErrorMessage(*args, **kwargs)#

Bases: Protocol[T_Code]

abstractmethod to_series(timestamp: datetime) Series#
_abc_impl = <_abc._abc_data object>#
_is_protocol = True#
_message: str | Callable[[Self], str]#
code: T_Code#
message: str#
severity: Literal['Notice', 'Warning', 'Error']#
class _ValidationErrorInfo(severity: Literal['Notice', 'Warning', 'Error'], code: T_Code)#

Bases: Generic[T_Code]

code: T_Code#
severity: Literal['Notice', 'Warning', 'Error']#